# This file provides an alternative method of estimating the incumbency advantage in Dahlgaard (2016) 
# through ranking candidates and simply comparing outcomes for the last winner and the first loser 

# K??r det med minus close og split, marginal races, marginal candidates, with cases in IK-B

rm(list=ls(all=TRUE))
graphics.off()
library(xtable)
library(dplyr)

setwd() # set working directory

#source main data analysis to obtain Imbens-Kalyanamaran bandwidths 

source("codes/dataan.R")

load("data/data_alt_boot.rdata")

# Generate blocks

block           <- data.frame(unique(cbind(data_alt$year,data_alt$muncpr,data_alt$candpart)))
colnames(block) <- c("year", "muncpr", "candpart")
block$block   <- 1:nrow(block)
data_alt$year        <- as.factor(data_alt$year)
data_alt$muncpr      <- as.factor(data_alt$muncpr)
data_alt <- left_join(data_alt, block, by = c("year", "muncpr", "candpart"))

# limit to parties with at least one elected 

data_alt_elected <-
  data_alt %>%
  group_by(year, muncpr, candpart) %>%
  summarise(represented = sum(elected)>0)

data_alt <- 
  left_join(data_alt, data_alt_elected) %>%
  filter(represented == 1)

#drop all from closed and split lists and if missing data 

data_alt <- 
  data_alt %>%
  filter(openlist == 1 & 
           splitlist == 0 &
           !is.na(margsim) & 
           !is.na(electedt1) & 
           !is.na(rerun) & 
           !is.na(block))

# Find minimum votes for winners and maximum votes for loser
# Minimum for winners
data_min_votes <- 
  data_alt %>%
  filter(elected == 1) %>%
  group_by(year, muncpr, candpart) %>%
  summarise(min_win = min(candvote)) 

# Maximum for losers 
data_max_votes <-
  data_alt %>%
  filter(elected == 0) %>%
  group_by(year, muncpr, candpart) %>%
  summarise(max_los = max(candvote)) 

# Merge on and select only marginal winners and losers 
data_rank_all <- 
  left_join(data_alt, left_join(data_min_votes, data_max_votes)) %>%
  filter( (elected == 1 & candvote == min_win) |
          (elected == 0 & candvote == max_los ) )

# Run model -- control for blocks (block) to account for 
rank_model_all_reelect <- 
  lm(electedt1 ~ elected + as.factor(block), data = data_rank_all)
rank_model_all_rerun <- 
  lm(rerun ~ elected + as.factor(block), data = data_rank_all)

# Keep only marginal races (automatically excludes non-marginal cases)
data_rank_marg_race <- 
  data_rank_all %>%
  filter(marg.los != 0 & marg.win != 1)

rank_model_marg_race_reelect <- 
  lm(electedt1 ~ elected + as.factor(block), data = data_rank_marg_race)
rank_model_marg_race_rerun <- 
  lm(rerun ~ elected + as.factor(block), data = data_rank_marg_race)

# Keep only cases with Imbens-Kalyanamaran bandwidth
data_rank_IKB_reelect <- 
  data_rank_marg_race %>%
  filter(abs(margsim) <= IK.bw)

data_rank_IKB_rerun <- 
  data_rank_marg_race %>%
  filter(abs(margsim) <= IK.bw2)

rank_model_IKB_reelect <- 
  lm(electedt1 ~ elected + as.factor(block), data = data_rank_IKB_reelect)

rank_model_IKB_rerun <- 
  lm(rerun ~ elected + as.factor(block), data = data_rank_IKB_rerun)

# Store estimates in table and print to latex
rank_table <- 
  matrix(NA, nrow = 3, ncol = 6)

rank_table[1, ] <- c(summary(rank_model_all_reelect)$coefficients[2,1],
                     summary(rank_model_marg_race_reelect)$coefficients[2,1],
                     summary(rank_model_IKB_reelect)$coefficients[2,1],
                     summary(rank_model_all_rerun)$coefficients[2,1],
                     summary(rank_model_marg_race_rerun)$coefficients[2,1],
                     summary(rank_model_IKB_rerun)$coefficients[2,1] )
                     
rank_table[2, ] <- c(summary(rank_model_all_reelect)$coefficients[2,2],
                     summary(rank_model_marg_race_reelect)$coefficients[2,2],
                     summary(rank_model_IKB_reelect)$coefficients[2,2],
                     summary(rank_model_all_rerun)$coefficients[2,2],
                     summary(rank_model_marg_race_rerun)$coefficients[2,2],
                     summary(rank_model_IKB_rerun)$coefficients[2,2] )

rank_table[3, ] <- c(nrow(data_rank_all),
                     nrow(data_rank_marg_race),
                     nrow(data_rank_IKB_reelect),
                     nrow(data_rank_all),
                     nrow(data_rank_marg_race),
                     nrow(data_rank_IKB_rerun))

#digits <- cbind(rep(0, 3), rbind(rep(3, 6), rep(3, 6), rep(0, 6)))
digits <- cbind(rep(0, 6), rep(3, 6), rep(3, 6), rep(0, 6))
colnames(rank_table) <- c("All on open, single lists",
                          "All in close races",
                          "All within IK bandwidth",
                          "All on open, single lists r",
                          "All in close races r",
                          "All within IK bandwidth r")

rownames(rank_table) <- c("Estimate", "Std. Error", "Observations")
xtable(t(rank_table), digits = digits)

